# % of children's education ----

Prob_childeducation <- ssm_filtered |> 
  dplyr::select(ParentHighEdu, ChildCollege_Max, Sex) |> 
  dplyr::summarise(ChildCollege_Max = round(mean(ChildCollege_Max * 100), 1), 
                   .by = c(ParentHighEdu, Sex))

Prob_childeducation <- ssm_filtered |> 
  dplyr::select(ParentHighEdu, ChildCollege_Max) |> 
  dplyr::group_by(ParentHighEdu) |> 
  dplyr::summarise(ChildCollege_Max = round(mean(ChildCollege_Max * 100), 1)) |> 
  dplyr::mutate(Sex = "Overall") |> 
  dplyr::bind_rows(Prob_childeducation)

Prob_childeducation |>
  dplyr::mutate(Sex = forcats::fct_relevel(Sex, "Overall", "Male", "Female")) |> 
  ggplot2::ggplot(ggplot2::aes(x = ParentHighEdu, y = ChildCollege_Max)) +
  ggplot2::geom_col() + 
  ggplot2::geom_text(aes(label = ChildCollege_Max), size = 10, color = "white", vjust = 3) + 
  ggplot2::facet_wrap(~ Sex) +
  ggplot2::labs(x = "Parent's Education", y = "% of University Completion of Children") +
  ggplot2::theme_bw(base_size = 22)

# Overall --------------------------------------------------------------

assignment_overall <- ssm_filtered |> dplyr::filter(ParentHighEdu == "High") |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()

estimate_overall <- ssm_filtered |> 
  gapclosing(
    counterfactual_assignments = assignment_overall,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

#### Extract tibble of factural and counterfactual estimates----------------------
Result <- estimate_overall$factual_means |> 
  dplyr::mutate(Estimates = "Factural")

Result <- estimate_overall$counterfactual_means |> 
  dplyr::mutate(Estimates = "Counterfactual") |> 
  dplyr::bind_rows(Result)

Result <- Result |> dplyr::mutate(Sex = "Overall")

close_value_oa <- estimate_overall$change_disparities |> dplyr::filter(ParentHighEdu == "High - Low" & change_type == "additive") |> select(estimate) |> unlist()

close_prop_oa <- estimate_overall$change_disparities |> dplyr::filter(ParentHighEdu == "High - Low" & change_type == "proportional") |> select(estimate) |> unlist()

# Male --------------------------------------------------------------

assignment_male <- ssm_filtered |> dplyr::filter(Sex == "Male" & ParentHighEdu == "High") |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()

estimate_male <- ssm_filtered |> 
  dplyr::filter(Sex == "Male") |> 
  gapclosing(
    counterfactual_assignments = assignment_male,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

#### Extract tibble of factural and counterfactual estimates----------------------
Result_m <- estimate_male$factual_means |> 
  dplyr::mutate(Estimates = "Factural")

Result_m <- estimate_male$counterfactual_means |> 
  dplyr::mutate(Estimates = "Counterfactual") |> 
  dplyr::bind_rows(Result_m)

Result_m <- Result_m |> dplyr::mutate(Sex = "Male")

close_value_m <- estimate_male$change_disparities |> dplyr::filter(ParentHighEdu == "High - Low" & change_type == "additive") |> select(estimate) |> unlist()

close_prop_m <- estimate_male$change_disparities |> dplyr::filter(ParentHighEdu == "High - Low" & change_type == "proportional") |> select(estimate) |> unlist()


# Female --------------------------------------------------------------

assignment_female <- ssm_filtered |> dplyr::filter(Sex == "Female" & ParentHighEdu == "High") |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()


estimate_female <- ssm_filtered |> 
  dplyr::filter(Sex == "Female") |> 
  gapclosing(
    counterfactual_assignments = assignment_female,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Education * Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

#### Extract tibble of factural and counterfactual estimates----------------------
Result_f <- estimate_female$factual_means |> 
  dplyr::mutate(Estimates = "Factural")

Result_f <- estimate_female$counterfactual_means |> 
  dplyr::mutate(Estimates = "Counterfactual") |> 
  dplyr::bind_rows(Result_f)

Result_f <- Result_f |> dplyr::mutate(Sex = "Female")

close_value_f <- estimate_female$change_disparities |> dplyr::filter(ParentHighEdu == "High - Low" & change_type == "additive") |> select(estimate) |> unlist()

close_prop_f <- estimate_female$change_disparities |> dplyr::filter(ParentHighEdu == "High - Low" & change_type == "proportional") |> select(estimate) |> unlist()

# Integrated Results -------------------------------------------------------

Result <- Result |> dplyr::bind_rows(Result_m) |> dplyr::bind_rows(Result_f)

Result <- Result |> dplyr::mutate(close_prop = dplyr::case_when(Sex == "Overall" ~ close_prop_oa, 
                                                                Sex == "Male" ~ close_prop_m, 
                                                                Sex == "Female" ~ close_prop_f), 
                                  close_value = dplyr::case_when(Sex == "Overall" ~ close_value_oa, 
                                                                 Sex == "Male" ~ close_value_m, 
                                                                 Sex == "Female" ~ close_value_f))

Result <- Result |> 
  dplyr::filter(Estimates == "Factural") |> 
  dplyr::group_by(Sex, ParentHighEdu) |> 
  dplyr::summarise(beginning = mean(estimate)) |> 
  ungroup() |> 
  dplyr::inner_join(Result, by = c("Sex", "ParentHighEdu"))

Result <- Result |> 
  dplyr::filter(Estimates == "Counterfactual") |> 
  dplyr::group_by(Sex, ParentHighEdu) |> 
  dplyr::summarise(end = mean(estimate)) |> 
  ungroup() |> 
  dplyr::inner_join(Result, by = c("Sex", "ParentHighEdu"))

# Visualize the results -------------------------------------------------------

Result |> 
  dplyr::mutate(Estimates = forcats::fct_relevel(Estimates, "Factural", "Counterfactual"), 
                Sex = forcats::fct_relevel(Sex, "Overall", "Male", "Female")) |> 
  dplyr::rename(`Parental education` = ParentHighEdu) |> 
  dplyr::group_by(Estimates) |> 
  dplyr::ungroup() |> 
  ggplot2::ggplot(aes(x = Estimates, y = estimate, ymin = ci.min, ymax = ci.max)) + 
  ggplot2::geom_point(aes(shape = `Parental education`), position = ggplot2::position_dodge(width = .1), size = 2.5) + 
  ggplot2::geom_errorbar(aes(shape = `Parental education`), position = ggplot2::position_dodge(width = .1), width = 0.1) + 
  ggplot2::geom_segment(aes(x = c(1.95, 1.05, 1.95, 1.05, 1.95, 1.05, 1.95, 1.05, 1.95, 1.05, 1.95, 1.05), xend = 1.5,
                            y = estimate, yend = estimate)) +
  ggplot2::geom_segment(aes(x = 1.5, xend = 1.5,
                            y = beginning, yend = end), 
                        linetype = "dashed", 
                        linewidth = .5) +
  ggplot2::geom_label(aes(x = 1.5, label = paste0("absolute: ", round(close_value, 2), "pt", "\n", "proportional: ", round((close_prop * 100), 1), "%", "\n", "gap closed"),
                          y = 3.05),
                      position = ggplot2::position_dodge(width = .1),
                      size = 5,
                      show.legend = FALSE) +
  ggplot2::facet_wrap(~ Sex) + 
  ggplot2::labs(x = "Settings", y = "Mean of Subjective Health") + 
  ggplot2::theme_bw(base_size = 18) + 
  ggplot2::scale_shape_manual(values = c(4, 1)) + 
  ggplot2::theme(legend.position = "top") + 
  ggplot2::scale_y_continuous(limits = c(3.0, 3.6))

# Table

Result_table <- Result |> 
  dplyr::select(Sex, ParentHighEdu, estimate, se, ci.min, ci.max, Estimates) |> 
  dplyr::mutate(Estimates = forcats::fct_relevel(Estimates, "Factural", "Counterfactual"), 
                Sex = forcats::fct_relevel(Sex, "Overall", "Male", "Female")) |> 
  dplyr::mutate(across(c(estimate, se, ci.min, ci.max), 
                       .fns = ~ round(.x, 3))) |> 
  tidyr::pivot_wider(names_from = c(ParentHighEdu), 
                     values_from = c(estimate, se, ci.min, ci.max))

write_csv(Result_table, "../submission_ssm/R-R_2024-01-05/fig/supplement/value.csv")
